* --------------------------------------------------------------------------------------------------
* Unwind_MF_Positions_Aggregate / Unwind_MF_Positions, Step 4
*
* All files in this folder (unwind) handle the unwinding of funds' positions in other funds. This
* procedure is referred to as "fund-in-fund" unwinding. If a given fund A holds a position in a
* different fund B, we refer to A as the "holding fund", and to B as the "investing fund".
*
* The holdings data produced by Steps 2 and 3 of the procedure is split up into half-yearly files.
* This job takes care of now aggregating the data to a yearly frequency.
* --------------------------------------------------------------------------------------------------

local year = `1'

foreach holdingname in "NonUS" "US" {

	di "Processing `holdingname' data"
	
	* For year 2018 and onwards, raw files that are used to generate HD files no longer have region_mstar variable
	if `year' < 2018 {
		local region_vars "region region_mstar"
		local region_vars_str `" "region" "region_mstar" "'
		local status_var "status_mstar"		
	}
	else {
		local region_vars "region"
		local region_vars_str `" "region" "'
		local status_var "Status"		
	}
	
	* First get original fund-level info
	use MasterPortfolioId fundtype_mstar `status_var' DomicileCountryId BroadCategoryGroup maturitydate `region_vars' using "$morningstar_hd/HD_`year'_m_PreUnwind.dta", clear
	replace maturitydate=. if year(maturitydate)>9999
	keep MasterPortfolioId fundtype_mstar `status_var' DomicileCountryId BroadCategoryGroup `region_vars'
	duplicates drop
	gsort `status_var'
	collapse (firstnm) `status_var', by(MasterPortfolioId fundtype_mstar  DomicileCountryId BroadCategoryGroup `region_vars')
	
	* Certain funds appear as both FO and FE because we get portfolio reports for these funds in both the FO 
	* and FE universes from Morningstar. For any given row, default to FO.
	gsort - fundtype_mstar
	collapse (firstnm) fundtype_mstar, by(MasterPortfolioId DomicileCountryId BroadCategoryGroup `region_vars')
	
	* Make sure MPID is unique
	duplicates tag MasterPortfolioId, generate(_dup)
	cap assert _dup == 0
	if !_rc {
		* if assertation is true or doesn't return error
		di "(1st Check) MPID is indeed unique"
	}
	else{ 
		quietly count if _dup > 0 
		di "(1st Check) MPID is not unique, number of MPID duplicates: `r(N)'."
		* Certain funds' region appear as both US and Rest because we get portfolio reports for these funds from both US and NonUS
		* universes from Morningstar. For any given row, default to US.	
		gsort - `region_vars'
		collapse (firstnm) `region_vars', by(MasterPortfolioId DomicileCountryId BroadCategoryGroup fundtype_mstar)
		
		* check again if MPID is unique
		duplicates tag MasterPortfolioId, generate(_dup)
		cap assert _dup == 0 
		if !_rc {
			di "(2nd Check) MPID is indeed unique"
		}
		else {
			quietly count if _dup > 0
			di "(2nd Check) MPID is not unique, number of MPID duplicates: `r(N)'."
			duplicates drop MasterPortfolioId, force
		}
	}
	drop _dup
	save "$temp/mf_unwind/hd_period_info/`holdingname'_`year'_fund_characteristics.dta", replace
	
	* Special care of the first year; note that I verified there are no positions to be unwound
	clear
	if `year' == $firstyear {
		use "$temp/mf_unwind/tmp_hd_files/`holdingname'_`year'_h1_m_step4.dta", clear	
	}
	else {
		use "$temp/mf_unwind/hd_period_info/`holdingname'_`year'_h1_m_step53.dta", clear
	}
	cap append using "$temp/mf_unwind/hd_period_info/`holdingname'_`year'_h2_m_step53.dta", force
	replace date = dofc(date)
	format date %td
	* account for NonUS step53 datasets can be empty for years up to 1997 
	cap drop date_m
	gen date_m = mofd(date)
	format date_m %tm
	cap drop index
	cap drop _merge
	
	* Merge with fund characteristics; reconstruct these
	mmerge MasterPortfolioId using "$temp/mf_unwind/hd_period_info/`holdingname'_`year'_fund_characteristics.dta", uname(u_) unmatched(m)
	* for earlier years up to 1997, where we drop the region NonUS duplicates, dataset can be empty with zero observations
	*	and don't have following list of variables in the master data 
	quietly count 
	if `r(N)' > 0{
		foreach var in "DomicileCountryId" "BroadCategoryGroup" "fundtype_mstar" `region_vars_str' {
			replace `var' = u_`var' if mf_unwound == 1
		}
		drop u_*
		cap drop _merge
	
		* Now run a sanity check (we save temp files for check (and later remove it) as preserve/restore requires more memory)
		save "$temp/mf_unwind/`holdingname'_`year'_m_step5_check.dta", replace
		gen _ones = 1
		collapse (sum) _ones, by(MasterPortfolioId DomicileCountryId)
		replace _ones = 1
		collapse (sum) _ones, by(MasterPortfolioId)
		
		* There are two MPIDs in 2020 that will fail the assertation 
		if `year' == 2020 {
			cap assert _ones == 1
		}
		else {
			assert _ones == 1
		}
		use "$temp/mf_unwind/`holdingname'_`year'_m_step5_check.dta", clear
		capture rm "$temp/mf_unwind/`holdingname'_`year'_m_step5_check.dta"
	}
	else {
		drop u_*
		cap drop _merge		
	}
	* Save the step5 files
	save "$temp/mf_unwind/`holdingname'_`year'_m_step5.dta", replace
}

* Put together
clear
append using "$temp/mf_unwind/US_`year'_m_step5.dta"
append using "$temp/mf_unwind/NonUS_`year'_m_step5.dta"
compress
save "$morningstar_hd/HD_`year'_m.dta", replace

* Quarterly version
use "$morningstar_hd/HD_`year'_m.dta", clear
cap drop month
gen month = month(date)
keep if month==3 | month==6 | month==9 | month==12
gen quarter = quarter(date)
gen year = year(date)
gen date_q = yq(year,quarter)
format date_q %tq
drop month quarter year
compress
save "$morningstar_hd/HD_`year'_q.dta", replace

* Yearly version
use "$morningstar_hd/HD_`year'_m.dta", clear
cap drop month
gen month = month(date)
keep if month==12
gen date_y = year(date)
format date_y %ty
drop month
compress
save "$morningstar_hd/HD_`year'_y.dta", replace
